{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# IsolationForest example\n\n\nAn example using :class:`sklearn.ensemble.IsolationForest` for anomaly\ndetection.\n\nThe IsolationForest 'isolates' observations by randomly selecting a feature\nand then randomly selecting a split value between the maximum and minimum\nvalues of the selected feature.\n\nSince recursive partitioning can be represented by a tree structure, the\nnumber of splittings required to isolate a sample is equivalent to the path\nlength from the root node to the terminating node.\n\nThis path length, averaged over a forest of such random trees, is a measure\nof normality and our decision function.\n\nRandom partitioning produces noticeable shorter paths for anomalies.\nHence, when a forest of random trees collectively produce shorter path lengths\nfor particular samples, they are highly likely to be anomalies.\n\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.ensemble import IsolationForest\n\nrng = np.random.RandomState(42)\n\n# Generate train data\nX = 0.3 * rng.randn(100, 2)\nX_train = np.r_[X + 2, X - 2]\n# Generate some regular novel observations\nX = 0.3 * rng.randn(20, 2)\nX_test = np.r_[X + 2, X - 2]\n# Generate some abnormal novel observations\nX_outliers = rng.uniform(low=-4, high=4, size=(20, 2))\n\n# fit the model\nclf = IsolationForest(max_samples=100, random_state=rng)\nclf.fit(X_train)\ny_pred_train = clf.predict(X_train)\ny_pred_test = clf.predict(X_test)\ny_pred_outliers = clf.predict(X_outliers)\n\n# plot the line, the samples, and the nearest vectors to the plane\nxx, yy = np.meshgrid(np.linspace(-5, 5, 50), np.linspace(-5, 5, 50))\nZ = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\nZ = Z.reshape(xx.shape)\n\nplt.title(\"IsolationForest\")\nplt.contourf(xx, yy, Z, cmap=plt.cm.Blues_r)\n\nb1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white',\n                 s=20, edgecolor='k')\nb2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green',\n                 s=20, edgecolor='k')\nc = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red',\n                s=20, edgecolor='k')\nplt.axis('tight')\nplt.xlim((-5, 5))\nplt.ylim((-5, 5))\nplt.legend([b1, b2, c],\n           [\"training observations\",\n            \"new regular observations\", \"new abnormal observations\"],\n           loc=\"upper left\")\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}